{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n===================================================\nLabel Propagation digits: Demonstrating performance\n===================================================\n\nThis example demonstrates the power of semisupervised learning by\ntraining a Label Spreading model to classify handwritten digits\nwith sets of very few labels.\n\nThe handwritten digit dataset has 1797 total points. The model will\nbe trained using all points, but only 30 will be labeled. Results\nin the form of a confusion matrix and a series of metrics over each\nclass will be very good.\n\nAt the end, the top 10 most uncertain predictions will be shown.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\n# Authors: Clay Woolam <clay@woolam.org>\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom scipy import stats\n\nfrom sklearn import datasets\nfrom sklearn.semi_supervised import LabelSpreading\n\nfrom sklearn.metrics import confusion_matrix, classification_report\n\ndigits = datasets.load_digits()\nrng = np.random.RandomState(2)\nindices = np.arange(len(digits.data))\nrng.shuffle(indices)\n\nX = digits.data[indices[:340]]\ny = digits.target[indices[:340]]\nimages = digits.images[indices[:340]]\n\nn_total_samples = len(y)\nn_labeled_points = 40\n\nindices = np.arange(n_total_samples)\n\nunlabeled_set = indices[n_labeled_points:]\n\n# #############################################################################\n# Shuffle everything around\ny_train = np.copy(y)\ny_train[unlabeled_set] = -1\n\n# #############################################################################\n# Learn with LabelSpreading\nlp_model = LabelSpreading(gamma=.25, max_iter=20)\nlp_model.fit(X, y_train)\npredicted_labels = lp_model.transduction_[unlabeled_set]\ntrue_labels = y[unlabeled_set]\n\ncm = confusion_matrix(true_labels, predicted_labels, labels=lp_model.classes_)\n\nprint(\"Label Spreading model: %d labeled & %d unlabeled points (%d total)\" %\n      (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples))\n\nprint(classification_report(true_labels, predicted_labels))\n\nprint(\"Confusion matrix\")\nprint(cm)\n\n# #############################################################################\n# Calculate uncertainty values for each transduced distribution\npred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T)\n\n# #############################################################################\n# Pick the top 10 most uncertain labels\nuncertainty_index = np.argsort(pred_entropies)[-10:]\n\n# #############################################################################\n# Plot\nf = plt.figure(figsize=(7, 5))\nfor index, image_index in enumerate(uncertainty_index):\n    image = images[image_index]\n\n    sub = f.add_subplot(2, 5, index + 1)\n    sub.imshow(image, cmap=plt.cm.gray_r)\n    plt.xticks([])\n    plt.yticks([])\n    sub.set_title('predict: %i\\ntrue: %i' % (\n        lp_model.transduction_[image_index], y[image_index]))\n\nf.suptitle('Learning with small amount of labeled data')\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}